df <- read.csv("CrowNestlingClimate.csv", h=TRUE)#n=2394 nestlings
Anne went through CrowNestlingClimate.csv and identified runts and
nestlings with untrustworthy measurements, which are outline by her in
“SoCalledRuntNestlingsByYear.xlsx” Below, I checked for the presence of
those nestlings and removed them from the df.
#Remove nestlings identified by Anne (runts/or suspect measurements)
#file name: Runt Nestlings so-called by year
df$ID[df$ID=="FFW-S DRIN02"]#in, remove below
## [1] "FFW-S DRIN02"
df$ID[df$ID=="RFA-S ALOT02"]#not in
## character(0)
df$ID[df$ID=="5 STAD96"]#not in
## character(0)
df$ID[df$ID=="Y-5 ROTA98"]#not in
## character(0)
df$ID[df$ID=="B3 WINK98"]#in, remove below
## [1] "B3 WINK98"
df$ID[df$ID=="B-1 WINK98"]#not in
## character(0)
df$ID[df$ID=="GL JSUP05"]#in, remove below
## [1] "GL JSUP05"
df$ID[df$ID=="CV WKAY05"]#not in
## character(0)
df$ID[df$ID=="YB-OSW HORC10"]#not in
## character(0)
df$ID[df$ID=="FE WCRI18"]#in, remove below
## [1] "FE WCRI18"
df$ID[df$ID=="Q4 CARE11"]#in, remove below
## [1] "Q4 CARE11"
df$ID[df$ID=="OD CCPL02"]#not in
## character(0)
df$ID[df$ID=="*00 WCRI14"]#not in
## character(0)
df$ID[df$ID=="JC NDYC17"]#in, remove below
## [1] "JC NDYC17"
df$ID[df$ID=="Dead under nest MIPA14"]#not in
## character(0)
df$ID[df$ID=="L-2 KRUM93"]#in, remove below
## [1] "L-2 KRUM93"
df$ID[df$ID=="orange SWEG91"]#in, remove below
## [1] "orange SWEG91"
df$ID[df$ID=="SWB-W BURP03"]#in, remove below
## [1] "SWB-W BURP03"
df$ID[df$ID=="3 PINW00"]#in, remove below
## [1] "3 PINW00"
df$ID[df$ID=="4 PINW00"]#in, remove below
## [1] "4 PINW00"
df$ID[df$ID=="DOA RGBY00"]#in, remove below
## [1] "DOA RGBY00"
df$ID[df$ID=="doa SEPG04"]#in, remove below
## [1] "doa SEPG04"
df$ID[df$ID=="2-r HORC04"]#in, remove below
## [1] "2-r HORC04"
df$ID[df$ID=="doa CLAR05"]#in, remove below
## [1] "doa CLAR05"
df$ID[df$ID=="doa HOMC05"]#in, remove below
## [1] "doa HOMC05"
df$ID[df$ID=="doa JUDD06"]#in, remove below
## [1] "doa JUDD06"
df$ID[df$ID=="doa1 KAYS06"]#in, remove below
## [1] "doa1 KAYS06"
df$ID[df$ID=="doa2 KAYS06"]#in, remove below
## [1] "doa2 KAYS06"
df$ID[df$ID=="doa3 KAYS06"]#in, remove below
## [1] "doa3 KAYS06"
df$ID[df$ID=="doa1 BROT07"]#in, remove below
## [1] "doa1 BROT07"
df$ID[df$ID=="doa2 BROT07"]#in, remove below
## [1] "doa2 BROT07"
df$ID[df$ID=="doa1 NMUR07"]#in, remove below
## [1] "doa1 NMUR07"
df$ID[df$ID=="doa2 NMUR07"]#in, remove below
## [1] "doa2 NMUR07"
df$ID[df$ID=="doa3 NMUR07"]#in, remove below
## [1] "doa3 NMUR07"
df$ID[df$ID=="doa4 NMUR07"]#in, remove below
## [1] "doa4 NMUR07"
df$ID[df$ID=="doa5 NMUR07"]#in, remove below
## [1] "doa5 NMUR07"
df$ID[df$ID=="X0 YFER00"]#identified in DuplicateNestlings file, remove below (*2 obs)
## [1] "X0 YFER00" "X0 YFER00"
#Remove 29 individuals listed above
df <- df %>% filter(!ID %in% c("FFW-S DRIN02","B3 WINK98","GL JSUP05","Q4 CARE11","JC NDYC17","L-2 KRUM93","orange SWEG91","SWB-W BURP03","3 PINW00","4 PINW00","doa SEPG04","2-r HORC04","doa CLAR05","doa HOMC05","doa JUDD06","doa1 KAYS06","doa2 KAYS06","doa3 KAYS06","doa1 BROT07","doa2 BROT07","doa1 NMUR07","doa2 NMUR07","doa3 NMUR07","doa4 NMUR07","doa5 NMUR07","FE WCRI18","DOA RGBY00","X0 YFER00"))#new n=2365
Below, I identified nestlings that happened to be measured twice (for
a variety of reasons). I sent that list to Anne, and she specified which
observations to keep in “DuplicateNestlings-ABCAnnotate.xlsx”
#Find duplicate IDs
duplicates.df <- df %>% group_by(ID) %>% filter(n()>1)
#list of duplicates sent to Anne
#write.csv(duplicates.df, "DuplicateNestlings.csv")
#Deleting SPEG04 per Anne's request
df <- df %>% filter(!NestName=="SEPG04")#4 nestlings (new n=2361)
#Keeping second measurement for GYMN08 per Anne's request
df <- subset(df, NestName!="GYMN08" | BandDateJul!="126")#new n=2359
#Update duplicate IDs (new n=38)
duplicates.df <- df %>% group_by(ID) %>% filter(n()>1)
#Code check on duplicates.df: Sort remaining duplicates.df by year and CalcAge and only retain the first (youngest) observation
duplicates.df <- duplicates.df %>% group_by(Year) %>% arrange(NestName, CalcAge) %>% dplyr::distinct(ID, .keep_all = TRUE)#new n=19
#Sort remaining duplicates.df by year and CalcAge and only retain the first (youngest) observation
df <- df %>% group_by(Year) %>% arrange(NestName, CalcAge) %>% dplyr::distinct(ID, .keep_all = TRUE)#new n=2340 (19 fewer than above, so code works)
summary(duplicated(df$ID))#no more duplicates
## Mode FALSE
## logical 2340
#Select variables
df <- df %>%
select(Year,Name,NestName,ID,AgeField,CalcAge,HatchDateJul,HatchDateJulYear,AllSex,BillNT,BillWidth,BillDepth,TEC,Head,UpperBill,UBillSurface,TotBillSurface,Skull,Tarsus,Weight)
#Rename variables
df <- df %>%
rename(FieldAge=AgeField,BNT=BillNT,BW=BillWidth,BD=BillDepth,UB=UpperBill,UBS=UBillSurface,TBS=TotBillSurface)
#Count NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
## Year Name NestName ID
## 0 3 0 0
## FieldAge CalcAge HatchDateJul HatchDateJulYear
## 161 3 3 3
## AllSex BNT BW BD
## 0 3 5 4
## TEC Head UB UBS
## 3 4 1 1
## TBS Skull Tarsus Weight
## 0 7 4 10
#Remove NAs
df <- df %>%
filter_at(vars(Weight,HatchDateJul,BD,Tarsus,Skull,BW), all_vars(!is.na(.)))#new n=2323
#Recount NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs#still 3 NAs in name for nestlings named "NA"
## Year Name NestName ID
## 0 3 0 0
## FieldAge CalcAge HatchDateJul HatchDateJulYear
## 159 0 0 0
## AllSex BNT BW BD
## 0 0 0 0
## TEC Head UB UBS
## 0 0 0 0
## TBS Skull Tarsus Weight
## 0 0 0 0
#Change "NA" name to "N_A" so R doesn't cause problems
df <- df %>%
mutate(Name = coalesce(Name, "N_A"))
#filter out Weights < 160
df <- df %>% filter(Weight > 160)#new n=2313
range(df$Weight)
## [1] 163 500
#filter between 24 and 30 days CalcAges
df <- df %>% filter(between(CalcAge, 24,30))#new n=2035
range(df$CalcAge)
## [1] 24.0 29.9
#resort df
df <- df %>% arrange(Year,NestName,ID)
WeightByFieldAge.plot <- ggplot(data = df, aes(x=FieldAge,y=Weight,label=ID,color=HatchDateJul))+
geom_point()
ggplotly(WeightByFieldAge.plot)
df <- df %>% filter(between(CalcAge, 24,30))
range(df$CalcAge)
## [1] 24.0 29.9
WeightByCalcAge.plot <- ggplot(data = df, aes(x=CalcAge,y=Weight,label=ID))+
geom_point()
ggplotly(WeightByCalcAge.plot)
climate.df <- read.csv("ClimateMetrics.csv", h=TRUE)
df <- left_join(df,climate.df, by = "HatchDateJulYear")
df <- df %>% relocate(Date, .before = AllSex)
#write.csv(df, "AllNestlingsClimateJoined.csv")
#variables that don't get scaled
DataNotScaled.df <- df[,1:10]
#Numerical data that do get scaled
DataToScale.df <- df[,11:39]
#Scale those data
Scaled.df <- scale(DataToScale.df)
#Rejoin with variables that don't get scaled
scaled.df <- cbind(DataNotScaled.df,Scaled.df)
BD.scaled.mdl <- lm(data = scaled.df, BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge)
summary(BD.scaled.mdl)
##
## Call:
## lm(formula = BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge,
## data = scaled.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0363 -0.4870 -0.0231 0.4569 3.6824
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.32054 0.42505 -10.165 < 2e-16 ***
## GDDSum12_22 0.14162 0.01712 8.275 2.30e-16 ***
## PrecipSum12_22 -0.01036 0.01687 -0.614 0.539
## Weight 0.57132 0.01809 31.580 < 2e-16 ***
## CalcAge 0.16540 0.01624 10.187 < 2e-16 ***
## GDDSum12_22:PrecipSum12_22 -0.07457 0.01569 -4.753 2.14e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7515 on 2029 degrees of freedom
## Multiple R-squared: 0.4366, Adjusted R-squared: 0.4352
## F-statistic: 314.4 on 5 and 2029 DF, p-value: < 2.2e-16